*************************************************
* Purpose -- To prepare QCEW annual data for 
* analysis at the state level
************************************************
clear all
* RUN 00_path_master.do FIRST TO GET FILEPATHS

************************************************
* NAICS data
************************************************

* keeping state aggregate employment data, statewise data for restaurant sector, and national data for restaurant sector
* for codes, see - https://www.bls.gov/cew/classifications/aggregation/agg-level-titles.htm
import delimited using "$raw/qcew/1990.annual.singlefile.csv", clear
keep if (agglvl_code == 51) | ///
(agglvl_code == 55 & industry_code == "722")

forvalues year = 1991/2019 {
	display "Working with `year'"
	preserve
		import delimited using "$raw/qcew/`year'.annual.singlefile.csv", clear
		keep if (agglvl_code == 51) | ///
		(agglvl_code == 55 & industry_code == "722")
		
		tempfile year
		save `year'
	restore
	
	append using `year', force
}

* within each aggregation level, there are data for different ownership codes, adding them up

collapse (sum) annual_avg_estabs annual_avg_emplvl total_annual_wages, by(area_fips year agglvl_code) 
//now the data is at the area-year-aggregation level

* adding state names
preserve
	import excel using "$raw/qcew/area-titles-xlsx.xlsx", firstrow clear
	tempfile names
	save `names'
restore

merge m:1 area_fips using `names', assert(2 3) keep(3) nogen

* saving an intermediate version so we don't have to run the time-consuming code above again
save "$raw/qcew/state_annual_appended.dta", replace
*/
use "$raw/qcew/state_annual_appended.dta", clear


************************************************
* Merging with minimum wage data
************************************************

preserve
	use "$raw/other/mw_state_annual_vz.dta", clear
	tostring statefips, replace
	replace statefips = statefips + "000" if strlen(statefips) == 2
	replace statefips = "0" + statefips + "000" if strlen(statefips) == 1
	rename statefips area_fips
	
	tempfile minwage
	save `minwage'
	
restore

merge m:1 area_fips year using `minwage'
drop if _merge == 1 | _merge == 2
rename max_mw mw
drop *_mw
// dropped - pre-1980 data, Puerto Rico, Virgin Islands, and "FBI - undesignated location"

drop _merge

gen fed_mw = .

replace fed_mw = 3.10 if year == 1980
replace fed_mw = 3.35 if year >= 1981 & year <=1989
replace fed_mw = 3.80 if year == 1990
replace fed_mw = 4.25 if year >=1991 & year<=1995
replace fed_mw = 4.75 if year == 1996
replace fed_mw = 5.15 if year>=1997 & year<=2006
replace fed_mw = 5.85 if year == 2007
replace fed_mw = 6.55 if year == 2008
replace fed_mw = 7.25 if year>=2009

// source - https://www.dol.gov/agencies/whd/minimum-wage/history/chart


************************************************
* Merging with population data
************************************************

preserve
	use "$raw/other/CPS_POPULATION_STATE_16PLUS.dta", clear
	tostring statefip, gen(area_fips)
	replace area_fips = area_fips + "000" if strlen(area_fips) == 2
	replace area_fips = "0" + area_fips + "000" if strlen(area_fips) == 1
	rename wtfinl pop
	drop statefip
	
	tempfile pop
	save `pop'
	
restore

merge m:1 area_fips year using `pop'
drop if _merge == 2 //dropped - years 2020-2024
drop _merge

************************************************
* Merging with price index
************************************************

preserve

	import excel using "$raw/other/r-cpi-u-rs-allitems.xlsx", cellrange(A6) firstrow clear
	rename YEAR - AVG, lower
	keep year avg
	keep if inrange(year,1990,2019)
	
	* reindex to have 2023 as 100
	gen index = (avg/449.3)*100 //449.3 is index for 2023
	replace index = index/100
	
	drop avg
	tempfile cpi_rs
	save `cpi_rs'
restore

merge m:1 year using `cpi_rs'
drop _merge

************************************************
* State abbreviations
************************************************

preserve
	import delimited using "$raw/other/state_abbreviations.csv", clear varnames(1)
	replace statename = "District of Columbia" if statename == "DC"
	tempfile abb
	save `abb'
restore

merge m:1 statename using `abb', assert(1 3) nogen


************************************************
* Reshaping data to area-year observations
************************************************

reshape wide annual_avg* total_annual_wages, i(area_fips year) j(agglvl_code)


foreach var in annual_avg_estabs55 annual_avg_emplvl55 total_annual_wages55 {
	replace `var'=. if inlist(statename,"Alaska","Delaware","Rhode Island") & year <=2000
} //Delaware and Rhode Island have restaurant data missing or incomplete for the 1990s, we will merge in data constructed from sub-sectors below



************************************************
* Merging with bridged data from RI, DE
* for the 1990s, constructed from sub-sectors
************************************************
merge 1:1 area_fips year using "$clean/qcew/AK_DE_RI_90s_scaled.dta", update assert(1 4) nogen

g _statefip = substr(area_fips, 1, 2)
g statefips = int(real(_statefip))

drop if statename == "District of Columbia"
save "$clean/qcew/qcew_annual_clean_state_fig_A1.dta", replace

drop if inlist(statename, "District of Columbia", "Alaska", "Hawaii")
save "$clean/qcew/qcew_annual_clean_state.dta", replace


